export GLOO_SOCKET_IFNAME=enp67s0f5
export TP_SOCKET_IFNAME=enp67s0f5
export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export VLLM_USE_V1=1
export VLLM_WORKER_MULTIPROC_METHOD=fork
export VLLM_ENABLE_MC2=0
export USING_LCCL_COM=0

export OMNI_USE_PANGU=1

python start_api_servers.py \
        --num-servers 1 \
        --model-path /data/weights/Pangu_R-72BA16B_3.0.1.1_SP4_128K \
        --master-ip 7.150.13.168 \
        --tp 8 \
        --master-port 35678 \
        --served-model-name pangu72B \
        --log-dir apiserverlog \
        --extra-args "--enforce-eager --enable-expert-parallel" \
        --base-api-port 9556 \
        --additional-config '{"enable_hybrid_graph_mode": true, "expert_parallel_size": 8, "expert_tensor_parallel_size": 1}' # 混部模式开启enable_hybrid_graph_mode
